library(parallel)
library(doMC)
library(DHARMa)
library(broom.mixed)
library(gratia)
library(MuMIn)
library(mgcv)
library(bbmle)
library(lme4)
library(AICcmodavg)
library(merTools)
library(magrittr)
library(gridExtra)
library(ggplot2); theme_set(theme_classic())
library(tidyr)
library(purrr)
library(plyr)
library(dplyr)
table 1: Variáveis utilizadas na descrição estatística
| code | description | class | range |
|---|---|---|---|
| n_nRef | number of SADs not refuted by the KS bootstrap test with critical p-value 0.05 | continuous | (0 ; 100) |
| diffS_mean | diffS = (S_MN - S_obs) / S_obs, diffS_mean = mean(diffS); S_MN = species richness estimated by MN | continuous | (-0.977 ; 4.78) |
| U_med | average of 10 replicates of the speciation rate estimated by MNEE | continuous | (8.860e-5 ; 4.221e-2) |
| p | proportion of tree cover | continuous | (0.013 ; 0.961) |
| MN | Neutral Model (EE = spatial explicit; EI = spatial implict) | category | 2 levels |
| d | mean dispersal distance (meters) | continuous | (0.456 ; 19.183) |
| k | proportion of propagules until the nearest neighborhood seq(0.99 : 0.05) | category | 20 levels |
| d_Lplot | mean dispersal distance / side of the sample area | continuous | (0.02 ; 0.192) |
| S_obs | observed species richness | integer | (26 ; 458) |
| Ntotal | number of individuals in the sample area | integer | (649 ; 12105) |
| SiteCode | sample area code | category | 103 levels |
Figure 1 Possible Predictor Variables
Figura 2.1 número de SADs não refutadas ~ p * k * MN. A linha azul é uma estimativa baseada em ‘loess’.
Figura 2.2 número de SADs não refutadas ~ (d * MN|Site ~ p). Site está ordenado pelo valor de p.
Figura 2.3 n_nRef ~ variáveis categoricas
linear term
random term
# dados
df_md <- df_resultados %>%
select(n_nRef,p,SiteCode,MN,p.z,k,d.z,d_Lplot.z)
df_md[,6:8] <- apply(df_md[,6:8],2,as.character)
df_md %<>%
gather(key = var_dispersao, value = value_dispersao, k:d_Lplot.z)
# funcao ajuste dos modelos
f_md <- function(dados){
var_dispersao <- unique(dados$var_dispersao)
if(var_dispersao == "k"){
l_md <- vector("list",2)
names(l_md) <- c(paste0(var_dispersao," 1|Site"),
paste0(var_dispersao," MN|Site"))
dados$value_dispersao <- factor(dados$value_dispersao,levels = unique(dados$value_dispersao)[20:1])
l_md[[1]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(1|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[2]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
}else{
dados$value_dispersao <- as.numeric(dados$value_dispersao)
l_md <- vector("list",3)
names(l_md) <- c(paste0(var_dispersao," 1|Site"),
paste0(var_dispersao," MN|Site"),
paste0(var_dispersao," (",var_dispersao,"+",var_dispersao,"^2)*MN|Site"))
l_md[[1]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (value_dispersao + I(value_dispersao^2)) * MN +
(1|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[2]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (value_dispersao + I(value_dispersao^2)) * MN +
(MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[3]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (value_dispersao + I(value_dispersao^2)) * MN +
( (value_dispersao + I(value_dispersao^2) ) * MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
}
return(l_md)
}
registerDoMC(3)
l_nRef__modeloCheio <- dlply(df_md,.(var_dispersao),f_md,.parallel = TRUE)
names(l_nRef__modeloCheio) <- NULL
l_nRef__modeloCheio <- do.call(c,l_nRef__modeloCheio)
#
f_warning <- function(glmm_object){
# glmm_object <- l_nRef__modeloCheio[[4]]
v_message <- glmm_object@optinfo$conv$lme4$messages %>%
as.character()
if(length(v_message)==0){v_message <- "OK"}
if(length(v_message)>1){v_message <- v_message[1] }
return(v_message)
}
df_auditoria.md <- ldply(l_nRef__modeloCheio,f_warning,.id="glmer") %>%
rename(warning_message = V1)
Table 2.1 Modelos Cheios estimados e avisos de convergência
| glmer | warning_message |
|---|---|
| d_Lplot.z 1|Site | Model failed to converge with max|grad| = 0.00343548 (tol = 0.002, component 1) |
| d_Lplot.z MN|Site | Model failed to converge with max|grad| = 0.00836802 (tol = 0.002, component 1) |
| d_Lplot.z (d_Lplot.z+d_Lplot.z^2)*MN|Site | OK |
| d.z 1|Site | Model failed to converge with max|grad| = 40.1827 (tol = 0.002, component 1) |
| d.z MN|Site | Model failed to converge with max|grad| = 0.00746139 (tol = 0.002, component 1) |
| d.z (d.z+d.z^2)*MN|Site | OK |
| k 1|Site | OK |
| k MN|Site | OK |
i <- 1
names(l_nRef__modeloCheio[v_glmerUpdate])[i]
#
## update1:
md_allFit <- allFit(l_nRef__modeloCheio[v_glmerUpdate][[i]],maxfun = 1e5, parallel = 'multicore', ncpus = 3)
1) d_Lplot 1|Site
7 optimizer(s) failed
2) d_Lplot MN|Site
7 optimizer(s) failed
3) d 1|Site
7 optimizer(s) failed
4) d MN|Site
7 optimizer(s) failed
# dados
df_md <- df_resultados %>%
select(n_nRef,p,SiteCode,MN,p.z,k,d.z,d_Lplot.z)
df_md[,6:8] <- apply(df_md[,6:8],2,as.character)
df_md %<>%
gather(key = var_dispersao, value = value_dispersao, k:d_Lplot.z)
# funcao ajuste dos modelos
f_md <- function(dados){
var_dispersao <- unique(dados$var_dispersao)
if(var_dispersao == "k"){
l_md <- vector("list",2)
names(l_md) <- c(paste0(var_dispersao," 1|Site"),
paste0(var_dispersao," MN|Site"))
dados$value_dispersao <- factor(dados$value_dispersao,levels = unique(dados$value_dispersao)[20:1])
l_md[[1]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(1|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[2]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
}else{
dados$value_dispersao <- as.numeric(dados$value_dispersao)
l_md <- vector("list",4)
names(l_md) <- c(paste0(var_dispersao," 1|Site"),
paste0(var_dispersao," MN|Site"),
paste0(var_dispersao," ",var_dispersao,"*MN|Site"),
paste0(var_dispersao,"^2 (",var_dispersao,"+",var_dispersao,"^2)*MN|Site"))
l_md[[1]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(1|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[2]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[3]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * value_dispersao * MN +
(value_dispersao * MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
l_md[[4]] <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (value_dispersao + I(value_dispersao^2)) * MN +
( (value_dispersao + I(value_dispersao^2) ) * MN|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e9)))
}
return(l_md)
}
registerDoMC(3)
l_nRef__modeloCheio2 <- dlply(df_md,.(var_dispersao),f_md,.parallel = TRUE)
names(l_nRef__modeloCheio2) <- NULL
l_nRef__modeloCheio2 <- do.call(c,l_nRef__modeloCheio2)
#
f_warning <- function(glmm_object){
v_message <- glmm_object@optinfo$conv$lme4$messages %>%
as.character()
if(length(v_message)==0){v_message <- "OK"}
if(length(v_message)>1){v_message <- v_message[1] }
return(v_message)
}
df_auditoria.md <- ldply(l_nRef__modeloCheio2,f_warning,.id="glmer") %>%
rename(warning_message = V1)
Table 2.2 Modelos Cheios estimados e avisos de convergência
| glmer | warning_message |
|---|---|
| d_Lplot.z 1|Site | Model failed to converge with max|grad| = 0.00284637 (tol = 0.002, component 1) |
| d_Lplot.z MN|Site | OK |
| d_Lplot.z d_Lplot.z*MN|Site | OK |
| d_Lplot.z^2 (d_Lplot.z+d_Lplot.z^2)*MN|Site | OK |
| d.z 1|Site | Model failed to converge with max|grad| = 0.00369235 (tol = 0.002, component 1) |
| d.z MN|Site | OK |
| d.z d.z*MN|Site | OK |
| d.z^2 (d.z+d.z^2)*MN|Site | OK |
| k 1|Site | OK |
| k MN|Site | OK |
d_Lplot.z 1|Site
7 optimizer(s) failed
d.z 1|Site
7 optimizer(s) failed
Tabela 2.3 Comparação baseada em AICc dos modelos cheios
| GLMM | dAICc | df | weight |
|---|---|---|---|
| d^2 (d+d^2)*MN|Site | 0.0 | 39 | 1 |
| d_Lplot^2 (d_Lplot+d_Lplot^2)*MN|Site | 274.6 | 39 | <0.001 |
| d d*MN|Site | 38427.3 | 22 | <0.001 |
| d_Lplot d_Lplot*MN|Site | 38586.5 | 22 | <0.001 |
| k MN|Site | 74141.2 | 123 | <0.001 |
| d MN|Site | 95710.0 | 15 | <0.001 |
| d_Lplot MN|Site | 96279.6 | 15 | <0.001 |
| k 1|Site | 106158.0 | 121 | <0.001 |
Tabela 2.4 Coeficiente de Determinação Condicional e Marginal
## R2m R2c
## theoretical 0.3232656 0.9128366
## delta 0.3111874 0.8787302
Figura 2.3 Resíduos Quantílicos do modelo cheio plausível
Figura 2.4 Quantile-quantile plot random effects.
Figura 2.5 Predito e observado pelo modelo. Em vermelho linha y=x; em azul um modelo linear por sítio de amostragem.
Figura 2.6 Predito e observado para modelo cheio
# dados
df_md <- df_resultados %>% filter(MN=="EE") %>%
distinct() %>%
mutate(rep = 1)
df_md <- left_join(x=data.frame(rep=1,
md_class=c("glmm d+d^2|Site",
"glmm d|Site",
"glmm 1|Site")),
y=df_md,
by="rep")
# função de ajuste
f_md <- function(dados){
md_class <- unique(dados$md_class)
if(md_class == "glmm d+d^2|Site"){
md_ <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (d.z + I(d.z^2)) +
(d.z + I(d.z^2)|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e6)),
na.action = "na.fail")
}else if(md_class == "glmm d|Site"){
md_ <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (d.z + I(d.z^2)) +
(d.z|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e6)),
na.action = "na.fail")
}else{
md_ <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (d.z + I(d.z^2)) +
(1|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e6)),
na.action = "na.fail")
}
return(md_)
}
registerDoMC(3)
l_nRefEE_mdCheio <- dlply(df_md,"md_class",f_md,.parallel = TRUE)
Tabela 2.2.1 Comparação de modelos Cheios
## dAICc df weight
## glmm d+d^2|Site 0.0 15 1
## glmm d|Site 2964.4 12 <0.001
## glmm 1|Site 21907.9 10 <0.001
Tabela 2.2.2 Coeficiente de determinação condicional e marginal do modelo cheio mais plausível
## R2m R2c
## theoretical 0.06866886 0.7657465
## delta 0.06513930 0.7263873
Figura 2.2.1 Resíduos Quantílicos do modelo cheio mais plausível
Figura 2.2.2 Quantile-quantile plot random effects.
Figura 2.2.3 Predito e observado pelo modelo. Em vermelho linha y=x; em azul um modelo linear por sítio de amostragem.
Tabela 2.2.3 Modelos com maior peso de evidência
| model_code | dAICc | df | weight |
|---|---|---|---|
| 30 | 0.0 | 11 | 0.1982 |
| 62 | 1.2 | 12 | 0.1096 |
| 96 | 1.3 | 13 | 0.1023 |
| 32 | 1.3 | 12 | 0.1018 |
| 22 | 1.4 | 10 | 0.0990 |
| 224 | 2.0 | 14 | 0.0713 |
| 128 | 2.5 | 14 | 0.0571 |
| 64 | 2.5 | 13 | 0.0564 |
| 24 | 2.7 | 11 | 0.0504 |
| 88 | 2.8 | 12 | 0.0481 |
Figura 2.2.4 Observado e predito pelo modelo médio
Figura 2.2.5 Predito pelo modelo médio para novo conjunto de dados
# dados
df_md <- df_resultados %>% filter(MN=="EI") %>%
distinct() %>%
mutate(rep = 1)
df_md <- left_join(x=data.frame(rep=1,
md_class=c("glmm d+d^2|Site",
"glmm d|Site",
"glmm 1|Site")),
y=df_md,
by="rep")
# função de ajuste
f_md <- function(dados){
md_class <- unique(dados$md_class)
if(md_class == "glmm d+d^2|Site"){
md_ <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (d.z + I(d.z^2)) +
(d.z + I(d.z^2)|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e6)),
na.action = "na.fail")
}else if(md_class == "glmm d|Site"){
md_ <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (d.z + I(d.z^2)) +
(d.z|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e6)),
na.action = "na.fail")
}else{
md_ <- glmer(cbind(n_nRef,100-n_nRef) ~
(p.z + I(p.z^2)) * (d.z + I(d.z^2)) +
(1|SiteCode),
family = "binomial",data=dados,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e6)),
na.action = "na.fail")
}
return(md_)
}
registerDoMC(2)
l_nRefEI_mdCheio <- dlply(df_md,"md_class",f_md,.parallel = TRUE)
Tabela 2.3.1 Comparação de modelos Cheios
## dAICc df weight
## glmm d+d^2|Site 0.0 15 1
## glmm d|Site 8608.0 12 <0.001
## glmm 1|Site 60738.9 10 <0.001
Tabela 2.3.2 Coeficiente de determinação condicional e marginal do modelo cheio mais plausível
## R2m R2c
## theoretical 0.3531940 0.9431867
## delta 0.3175205 0.8479224
Figura 2.3.1 Resíduos Quantílicos do modelo cheio mais plausível
Figura 2.3.2 Quantile-quantile plot random effects.
Figura 2.3.3 Predito e observado pelo modelo. Em vermelho linha y=x; em azul um modelo linear por sítio de amostragem.
Tabela 2.3.3 Modelos com maior peso de evidência
| model_code | dAICc | df | weight |
|---|---|---|---|
| 80 | 0.0 | 12 | 0.1799 |
| 72 | 0.2 | 11 | 0.1645 |
| 112 | 0.7 | 13 | 0.1285 |
| 96 | 0.9 | 13 | 0.1131 |
| 88 | 1.1 | 12 | 0.1031 |
| 208 | 1.2 | 13 | 0.1004 |
| 224 | 2.1 | 14 | 0.0631 |
| 128 | 2.3 | 14 | 0.0579 |
| 240 | 2.6 | 14 | 0.0497 |
| 256 | 4.0 | 15 | 0.0244 |
Figura 2.3.4 Observado e predito pelo modelo médio
Figura 2.3.5 Predito pelo modelo médio para novo conjunto de dados
Figura 2.4.1 modavgPred(type=“response”)
Figura 2.4.2 arm::invlogit(modavgPred(type=“link”))
Janela de Código 2.4.1 modavgPred(type=“link”)
# Model Averaging for new data predictions
df_pred <- expand.grid(SiteCode=levels(df_resultados$SiteCode)[1],
d.z=seq(min(df_resultados$d.z),max(df_resultados$d.z),length=40),
p.z=seq(min(df_resultados$p.z),max(df_resultados$p.z),length=200))
l_md <- list()
l_md[[1]] <- l_md.dredge_nRefEE
l_md[[2]] <- l_md.dredge_nRefEI
names(l_md) <- c("EE","EI")
# funcao
f_modavgPred <- function(md_object, df = df_pred, type_ = "link"){
md_avg <- modavgPred(md_object, newdata = df, type = type_ ) %>%
cbind(df)
}
registerDoMC(2)
l_mdPredict <- llply(l_md,f_modavgPred,.parallel = TRUE)
Figura 2.4.2 log odds ratio auto coerência. Nas duas ultimas linhas no eixo y está a variável no respectivo histogram na primeira coluna.
#
df_plot_logOR <- data.frame(p.z = l_mdPredict[["EE"]]$p.z,
d.z = l_mdPredict[["EE"]]$d.z,
logOR_EE.EI = l_mdPredict[["EE"]]$mod.avg.pred - l_mdPredict[["EI"]]$mod.avg.pred,
lower.logOR_EE.EI = l_mdPredict[["EE"]]$lower.CL - l_mdPredict[["EI"]]$lower.CL,
upper.logOR_EE.EI = l_mdPredict[["EE"]]$upper.CL - l_mdPredict[["EI"]]$upper.CL,
logOdds_EE = l_mdPredict[["EE"]]$mod.avg.pred,
lower.logOdds_EE = l_mdPredict[["EE"]]$lower.CL,
upper.logOdds_EE = l_mdPredict[["EE"]]$upper.CL,
logOdds_EI = l_mdPredict[["EI"]]$mod.avg.pred,
lower.logOdds_EI = l_mdPredict[["EI"]]$lower.CL,
upper.logOdds_EI = l_mdPredict[["EI"]]$upper.CL)
l_p <- list()
l_p[[1]] <- df_plot_logOR[,3:5] %>%
gather(key=logOR_class, value=logOR_value,logOR_EE.EI:upper.logOR_EE.EI) %>%
ggplot(aes(x=.$logOR_value)) + labs(x="",y="") +
geom_histogram(bins=60) + facet_wrap(~logOR_class,ncol=3,dir = "v")
for(i in 3:5){
l_p[[i-1]] <- df_plot_logOR[,c(i,i+3,i+6)] %>%
gather(key = "logOdds_MN", value = "logOdds_value", names(.)[-1]) %>%
ggplot(aes_string(x=names(.)[3],y=names(.)[1])) +
geom_point(alpha=0.3) +
facet_wrap(~logOdds_MN,ncol=1) + labs(x="",y="")
}
grid.arrange(l_p[[1]],l_p[[2]],l_p[[3]],l_p[[4]],
layout_matrix = rbind(c(1,1,1),
2:4)
)
Figura 2.4.3 Log odds ratio EE/EI
figura 2.4.4 Probabilidade ~ log Odds
Figura 2.4.5 Log odds ratio não truncado e truncado
Para d = 20, qual intervalo de p logOR c [-5,5]?
## range fit lower upper
## 1 min 0.17 0.05 0.25
## 2 max 0.24 0.16 0.32
## 3 diff 0.07 0.11 0.07